
//Chris' WinGlide enhancements
//Copr. 1998, Chris Dohnal (cwdohnal@ucdavis.edu)

#include <windows.h>

VOID FPUPitchCopy8(BYTE *, BYTE *, LONG, LONG, LONG, LONG);
VOID MMXPitchCopy8(BYTE *, BYTE *, LONG, LONG, LONG, LONG);

VOID FPUPitchCopy8(BYTE *pSource, BYTE *pDest, LONG lSourcePitch, LONG lDestPitch, LONG lXCount8, LONG lYCount) {
	
	//Make sure lXCount8 > 0
	if (lXCount8 <= 0) {
		return;
	}

	//Make sure lYCount > 0
	if (lYCount <= 0) {
		return;
	}
	
	__asm {
		//Get the source and destination pointers
		mov esi, pSource
		mov edi, pDest

		//Transfer the data
		mov ebx, lYCount
		ycopyloop:
			
			//Get the number of eight byte pieces in each scanline
			mov ecx, lXCount8
			
			//Transfer a scanline
			xor edx, edx
			xcopyloop:
				fild QWORD PTR [esi + edx*8]
				fistp QWORD PTR [edi + edx*8]
				inc edx
				dec ecx
				jnz xcopyloop			
		
			//Calculate the address of the next scanline
			add esi, lSourcePitch;
			add edi, lDestPitch;

			dec ebx
			jnz ycopyloop
	}

	return;
}

VOID MMXPitchCopy8(BYTE *pSource, BYTE *pDest, LONG lSourcePitch, LONG lDestPitch, LONG lXCount8, LONG lYCount) {

	//Make sure lXCount8 > 0
	if (lXCount8 <= 0) {
		return;
	}

	//Make sure lYCount > 0
	if (lYCount <= 0) {
		return;
	}
	
	__asm {
		//Get the source and destination pointers
		mov esi, pSource
		mov edi, pDest

		//Transfer the data
		mov ebx, lYCount
		ycopyloop:
			
			//Get the number of eight byte pieces in each scanline
			mov ecx, lXCount8
			
			//Transfer a scanline
			xor edx, edx
			xcopyloop:
				movq mm0, QWORD PTR [esi + edx*8]
				movq QWORD PTR [edi + edx*8], mm0
				inc edx
				dec ecx
				jnz xcopyloop			
		
			//Calculate the address of the next scanline
			add esi, lSourcePitch;
			add edi, lDestPitch;

			dec ebx
			jnz ycopyloop

		//Emtpy the MMX state
		emms
	}

	return;
}
